import re
import pandas as pd

with open('The Joy of Reading.txt', 'r') as f:
    content = f.read()

# print(content.split())
# 去掉所有标点符号
# print(re.split(r"[\s.()-?{}]+", content))
# 去掉所有标点符号，然后统计前20个词频

words = re.split(r"[\s.()-?{}]+", content)
print(pd.Series(words).value_counts()[:20])
